
%  =======================================================================================================================================================
% 
%  Code Description: 
%  This codefile builds the IFS-Base dataset from the raw data provided by the Bundesbank's  Research Data and Service Centre (RDSC)
% 
%  =======================================================================================================================================================
% 
% 
%  General disclaimer:
%  This file directory produces replication code for "Connected Funds". 
%  Because we cannot share the underlying data provided by the Bundesbank's Research Data and Service Centre (RDSC) and other subscription data sources, 
%  we have included pseudo data to show how the raw data are formatted. 
%  Other researchers can go through a similar approval and subscription process to obtain the underlying data. (2023-04-06)
% 
%  =======================================================================================================================================================


clear all;
close all;

projectPath = 'C:\ConnectedFunds_Codebase\'

% Add functions folders
addpath(strcat(projectPath, 'Code\matlab_functions'));

%%% now glue together the data from Hauptpos in one big dataset

load(strcat(projectPath, 'Data\IFS\mat\umonth'), 'umonth');
load(strcat(projectPath, 'Data\IFS\mat\ufund'), 'ufund');
load(strcat(projectPath, 'Data\IFS\mat\uwpkenn'), 'uwpkenn');
load(strcat(projectPath, 'Data\IFS\mat\usubpos'), 'usubpos');

for t = 1:length(umonth)

    umonth(t),
    
    load(strcat(projectPath, 'Data\IFS\mat\IFS_Hauptpos_', num2str(umonth(t))), 'IFS_Hauptpos');
    
    ufund_t = unique(IFS_Hauptpos.ISIN);
    [~,idxWhereFund]  = ismember(IFS_Hauptpos.ISIN,ufund_t);
    [~,idxWhereSubpos] = ismember(IFS_Hauptpos.SUBPOS,usubpos);
    
    tmp = array2table(accumarray([idxWhereFund idxWhereSubpos],IFS_Hauptpos.AMOUNT,[length(ufund_t) length(usubpos)],[],NaN));
    tmp.Properties.VariableNames = cellstr(usubpos)';

    % replace missing values with zeros for items that were reported by other funds
    for kk = 1:length(usubpos)
        if sum(isnan(table2array(tmp(:,kk))))>0 & sum(isnan(table2array(tmp(:,kk))))<height(tmp)
           jj = find(isnan(table2array(tmp(:,kk)))==1);
           tmp(jj,kk) = table(zeros(length(jj),1));
        end
    end
    clear jj kk;


    tmp_ifs = table();
    for i = 1:length(ufund_t)
       kk = find(idxWhereFund==i,1,'first');
       if umonth(t)<201412
       tmp_ifs(i,:) = IFS_Hauptpos(kk,[18 2:11]);
       else
       tmp_ifs(i,:) = IFS_Hauptpos(kk,[32 2:23]);
       end
    end
   
    
    if t == 1
    IFS_All = [tmp_ifs tmp];
    else
        if umonth(t)==201412
            IFS = [tmp_ifs tmp];
            colmissing = setdiff(IFS.Properties.VariableNames, IFS_All.Properties.VariableNames);
            disp('tell matlab which cols are cell and which are numeric')
            for colname = colmissing
                if iscell(colname{1})
                   IFS_All.(colname{1}) = cell(height(IFS_All), 1);
                else
                   IFS_All.(colname{1}) = NaN(height(IFS_All), 1); 
                end
            end
            clear colname colmissing;
        end
            
        IFS_All = [IFS_All; tmp_ifs tmp];     
    end
    
    clear kk i* tmp* ufund_t IFS_Hauptpos IFS;
end

save(strcat(projectPath, 'Data\IFS\IFS_All'), 'IFS_All', 'ufund', 'umonth');